{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Autoscaling a SageMaker Endpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import boto3\n",
    "import sagemaker\n",
    "import pandas as pd\n",
    "\n",
    "sess = sagemaker.Session()\n",
    "bucket = sess.default_bucket()\n",
    "role = sagemaker.get_execution_role()\n",
    "region = boto3.Session().region_name\n",
    "\n",
    "sm = boto3.Session().client(service_name=\"sagemaker\", region_name=region)\n",
    "autoscale = boto3.Session().client(service_name=\"application-autoscaling\", region_name=region)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "%store -r tensorflow_endpoint_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    tensorflow_endpoint_name\n",
    "    print(\"[OK]\")\n",
    "except NameError:\n",
    "    print(\"+++++++++++++++++++++++++++++++\")\n",
    "    print(\"[ERROR] Please run the notebooks in the previous notebook before you continue.\")\n",
    "    print(\"+++++++++++++++++++++++++++++++\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(tensorflow_endpoint_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Copy the Model to the Notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "autoscale.register_scalable_target(\n",
    "    ServiceNamespace=\"sagemaker\",\n",
    "    ResourceId=\"endpoint/\" + tensorflow_endpoint_name + \"/variant/AllTraffic\",\n",
    "    ScalableDimension=\"sagemaker:variant:DesiredInstanceCount\",\n",
    "    MinCapacity=1,\n",
    "    MaxCapacity=2,\n",
    "    RoleARN=role,\n",
    "    SuspendedState={\n",
    "        \"DynamicScalingInSuspended\": False,\n",
    "        \"DynamicScalingOutSuspended\": False,\n",
    "        \"ScheduledScalingSuspended\": False,\n",
    "    },\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# check the target is available\n",
    "autoscale.describe_scalable_targets(\n",
    "    ServiceNamespace=\"sagemaker\",\n",
    "    MaxResults=100,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "autoscale.put_scaling_policy(\n",
    "    PolicyName=\"bert-reviews-autoscale-policy\",\n",
    "    ServiceNamespace=\"sagemaker\",\n",
    "    ResourceId=\"endpoint/\" + tensorflow_endpoint_name + \"/variant/AllTraffic\",\n",
    "    ScalableDimension=\"sagemaker:variant:DesiredInstanceCount\",\n",
    "    PolicyType=\"TargetTrackingScaling\",\n",
    "    TargetTrackingScalingPolicyConfiguration={\n",
    "        \"TargetValue\": 2.0,\n",
    "        \"PredefinedMetricSpecification\": {\n",
    "            \"PredefinedMetricType\": \"SageMakerVariantInvocationsPerInstance\",\n",
    "        },\n",
    "        \"ScaleOutCooldown\": 60,\n",
    "        \"ScaleInCooldown\": 300,\n",
    "    },\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from IPython.core.display import display, HTML\n",
    "\n",
    "display(\n",
    "    HTML(\n",
    "        '<b>Review <a target=\"blank\" href=\"https://console.aws.amazon.com/sagemaker/home?region={}#/endpoints/{}\">SageMaker REST Endpoint</a></b>'.format(\n",
    "            region, tensorflow_endpoint_name\n",
    "        )\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "\n",
    "waiter = sm.get_waiter(\"endpoint_in_service\")\n",
    "waiter.wait(EndpointName=tensorflow_endpoint_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test the Deployed Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from sagemaker.tensorflow.model import TensorFlowPredictor\n",
    "from sagemaker.serializers import JSONLinesSerializer\n",
    "from sagemaker.deserializers import JSONLinesDeserializer\n",
    "\n",
    "predictor = TensorFlowPredictor(\n",
    "    endpoint_name=tensorflow_endpoint_name,\n",
    "    sagemaker_session=sess,\n",
    "    model_name=\"saved_model\",\n",
    "    model_version=0,\n",
    "    content_type=\"application/jsonlines\",\n",
    "    accept_type=\"application/jsonlines\",\n",
    "    serializer=JSONLinesSerializer(),\n",
    "    deserializer=JSONLinesDeserializer(),\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Waiting for the Endpoint to be ready to Serve Predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "\n",
    "time.sleep(30)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Run a Lot of Predictions and Watch the SageMaker Endpoint Scale Out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.core.display import display, HTML\n",
    "\n",
    "display(\n",
    "    HTML(\n",
    "        '<b>Review <a target=\"blank\" href=\"https://console.aws.amazon.com/sagemaker/home?region={}#/endpoints/{}\">SageMaker REST Endpoint</a></b>'.format(\n",
    "            region, tensorflow_endpoint_name\n",
    "        )\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "inputs = [{\"features\": [\"This is great!\"]}, {\"features\": [\"This is bad.\"]}]\n",
    "\n",
    "for i in range(0, 100000):\n",
    "    predicted_classes = predictor.predict(inputs)\n",
    "\n",
    "    for predicted_class in predicted_classes:\n",
    "        print(\"Predicted star_rating: {}\".format(predicted_class))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "autoscale.describe_scaling_activities(\n",
    "    ServiceNamespace=\"sagemaker\",\n",
    "    ResourceId=\"endpoint/\" + tensorflow_endpoint_name + \"/variant/AllTraffic\",\n",
    "    ScalableDimension=\"sagemaker:variant:DesiredInstanceCount\",\n",
    "    MaxResults=100\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Delete Endpoint\n",
    "To save cost, we should delete the endpoint."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sm.delete_endpoint(\n",
    "#      EndpointName=tensorflow_endpoint_name\n",
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%html\n",
    "\n",
    "<p><b>Shutting down your kernel for this notebook to release resources.</b></p>\n",
    "<button class=\"sm-command-button\" data-commandlinker-command=\"kernelmenu:shutdown\" style=\"display:none;\">Shutdown Kernel</button>\n",
    "        \n",
    "<script>\n",
    "try {\n",
    "    els = document.getElementsByClassName(\"sm-command-button\");\n",
    "    els[0].click();\n",
    "}\n",
    "catch(err) {\n",
    "    // NoOp\n",
    "}    \n",
    "</script>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%javascript\n",
    "\n",
    "try {\n",
    "    Jupyter.notebook.save_checkpoint();\n",
    "    Jupyter.notebook.session.delete();\n",
    "}\n",
    "catch(err) {\n",
    "    // NoOp\n",
    "}"
   ]
  }
 ],
 "metadata": {
  "instance_type": "ml.t3.medium",
  "kernelspec": {
   "display_name": "Python 3 (Data Science)",
   "language": "python",
   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
